This vignette shows how to use Signac with Seurat. There are three parts: Seurat, Signac and then visualization. We use an example CITE-seq data set from 10X Genomics.
Start with the standard pre-processing steps for a Seurat object.
library(Seurat)
Download data from 10X Genomics.
download.file("https://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_protein_v3/pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5", destfile = "pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")
Create a Seurat object, and then perform SCTransform normalization. Note:
# load dataset
E = Read10X_h5(filename = "pbmc_10k_protein_v3_filtered_feature_bc_matrix.h5")
pbmc <- CreateSeuratObject(counts = E$`Gene Expression`, project = "pbmc")
# store mitochondrial percentage in object meta data
pbmc <- PercentageFeatureSet(pbmc, pattern = "^MT-", col.name = "percent.mt")
# run sctransform
pbmc <- SCTransform(pbmc,vars.to.regress = "percent.mt", verbose = FALSE)
Perform dimensionality reduction by PCA and UMAP embedding. Note:
# These are now standard steps in the Seurat workflow for visualization and clustering
pbmc <- RunPCA(pbmc, verbose = FALSE)
pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = FALSE)
pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = FALSE)
First, make sure you have the Signac package installed.
devtools::install_github("mathewchamberlain/Signac")
library(Signac)
Generate Signac labels for the Seurat object. Note:
# Run Signac
data("training_HPCA")
labels <- Signac(pbmc, R = training_HPCA)
celltypes = Generate_lbls(labels, E = pbmc)
Now we can visualize the cell type classifications at many different levels: Immune and nonimmune
pbmc <- Seurat::AddMetaData(pbmc, metadata=celltypes$Immune, col.name = "immmune")
pbmc <- Seurat::SetIdent(pbmc, value='immmune')
DimPlot(pbmc)
pbmc <- Seurat::AddMetaData(pbmc, metadata=celltypes$L2, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
lbls = factor(celltypes$CellTypes)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
lbls = factor(celltypes$CellStates)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
lbls = factor(celltypes$CellStates_novel)
levels(lbls) <- sort(unique(lbls))
pbmc <- Seurat::AddMetaData(pbmc, metadata=lbls, col.name = "celltypes")
pbmc <- Seurat::SetIdent(pbmc, value='celltypes')
DimPlot(pbmc)
Add protein expression information
pbmc[["ADT"]] <- CreateAssayObject(counts = E$`Antibody Capture`[,colnames(E$`Antibody Capture`) %in% colnames(pbmc)])
## Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
pbmc <- NormalizeData(pbmc, assay = "ADT", normalization.method = "CLR")
pbmc <- ScaleData(pbmc, assay = "ADT")
Visualize protein information on the RNA clustering result
FeaturePlot(pbmc, features = c("CD8a-TotalSeqB", "CD8A"), ncol = 2)
## Warning: Could not find CD8a-TotalSeqB in the default search locations, found in ADT assay instead
RidgePlot(pbmc, features = c("CD8a-TotalSeqB"))
## Warning: Could not find CD8a-TotalSeqB in the default search locations, found in ADT assay instead
Identify differentially expressed proteins between clusters
# Downsample the clusters to a maximum of 500 cells each (makes the heatmap easier to see for
# small clusters)
pbmc.small <- subset(pbmc, downsample = 500)
# Find protein markers for all clusters, and draw a heatmap
adt.markers <- FindAllMarkers(pbmc.small, assay = "ADT", only.pos = TRUE, verbose = F)
DoHeatmap(pbmc.small, features = unique(adt.markers$gene), assay = "ADT", angle = 45) + NoLegend()
Save results
saveRDS(pbmc, file = "citeseq_signac.rds")
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS: /usr/lib64/libblas.so.3.4.2
## LAPACK: /site/ne/app/x86_64/R/v3.5.0/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bzinb_1.0.4 Signac_2.0.7 pscl_1.5.5 MASS_7.3-51.1 forcats_0.5.0
## [6] stringr_1.4.0 dplyr_0.8.5 purrr_0.3.3 readr_1.3.1 tidyr_1.1.1
## [11] tibble_3.0.3 tidyverse_1.3.0 sctransform_0.2.1 Seurat_3.2.0 ggplot2_3.3.2
## [16] veni_1.0.3 Matrix.utils_0.9.8 Matrix_1.2-18 biomaRt_2.38.0
##
## loaded via a namespace (and not attached):
## [1] reticulate_1.16-9001 neuralnet_1.44.2 lme4_1.1-23 tidyselect_1.1.0
## [5] RSQLite_2.2.0 AnnotationDbi_1.44.0 htmlwidgets_1.5.1 grid_3.5.0
## [9] BiocParallel_1.16.6 Rtsne_0.15 munsell_0.5.0 codetools_0.2-16
## [13] ica_1.0-2 statmod_1.4.34 future_1.16.0 miniUI_0.1.1.1
## [17] withr_2.1.2 colorspace_1.4-0 GOSemSim_2.8.0 Biobase_2.42.0
## [21] knitr_1.28 rstudioapi_0.11 stats4_3.5.0 ROCR_1.0-7
## [25] tensor_1.5 DOSE_3.8.2 listenv_0.8.0 labeling_0.3
## [29] urltools_1.7.3 polyclip_1.10-0 bit64_0.9-7 farver_2.0.3
## [33] rhdf5_2.27.13 vctrs_0.3.2 generics_0.0.2 xfun_0.12
## [37] randomForest_4.6-14 R6_2.4.1 graphlayouts_0.6.0 rsvd_1.0.3
## [41] RJSONIO_1.3-1.4 bitops_1.0-6 spatstat.utils_1.17-0 fgsea_1.8.0
## [45] gridGraphics_0.5-0 assertthat_0.2.1 promises_1.1.0 scales_1.1.0
## [49] ggraph_2.0.3 enrichplot_1.2.0 gtable_0.3.0 npsurv_0.4-0
## [53] globals_0.12.5 goftest_1.2-2 tidygraph_1.2.0 rlang_0.4.8
## [57] aplpack_1.3.3 splines_3.5.0 lazyeval_0.2.2 broom_0.7.0
## [61] europepmc_0.4 checkmate_2.0.0 yaml_2.2.1 BiocManager_1.30.10
## [65] reshape2_1.4.4 abind_1.4-5 modelr_0.1.8 backports_1.1.6
## [69] rsconnect_0.8.16 httpuv_1.5.2 qvalue_2.14.1 tools_3.5.0
## [73] tcltk_3.5.0 ggplotify_0.0.5 ellipsis_0.3.0 gplots_3.0.3
## [77] RColorBrewer_1.1-2 BiocGenerics_0.28.0 ggridges_0.5.2 Rcpp_1.0.4.6
## [81] plyr_1.8.6 progress_1.2.2 RCurl_1.98-1.1 prettyunits_1.1.1
## [85] rpart_4.1-15 deldir_0.1-28 pbapply_1.4-2 viridis_0.5.1
## [89] cowplot_1.0.0 S4Vectors_0.20.1 zoo_1.8-4 grr_0.9.5
## [93] haven_2.3.1 ggrepel_0.8.2 cluster_2.1.0 fs_1.4.1
## [97] magrittr_1.5 RSpectra_0.16-0 data.table_1.13.0 DO.db_2.9
## [101] reprex_0.3.0 lmtest_0.9-37 triebeard_0.3.0 RANN_2.6.1
## [105] reactome.db_1.66.0 packrat_0.5.0 fitdistrplus_1.0-14 evaluate_0.14
## [109] hms_0.5.3 patchwork_1.0.1 lsei_1.2-0 mime_0.9
## [113] xtable_1.8-4 XML_3.99-0.3 readxl_1.3.1 IRanges_2.16.0
## [117] gridExtra_2.3 compiler_3.5.0 KernSmooth_2.23-16 crayon_1.3.4
## [121] minqa_1.2.4 htmltools_0.4.0 mgcv_1.8-31 later_1.0.0
## [125] lubridate_1.7.8 ReactomePA_1.26.0 DBI_1.1.0 tweenr_1.0.1
## [129] dbplyr_1.4.4 rappdirs_0.3.1 boot_1.3-24 cli_2.0.2
## [133] gdata_2.18.0 parallel_3.5.0 igraph_1.2.5 pkgconfig_2.0.3
## [137] rvcheck_0.1.8 plotly_4.9.2.1 xml2_1.3.1 rvest_0.3.5
## [141] digest_0.6.18 RcppAnnoy_0.0.16 graph_1.60.0 spatstat.data_1.4-3
## [145] rmarkdown_2.1 cellranger_1.1.0 leiden_0.3.3 fastmatch_1.1-0
## [149] uwot_0.1.8 shiny_1.4.0.2 gtools_3.8.1 graphite_1.28.2
## [153] nloptr_1.2.2.1 glasso_1.11 lifecycle_0.2.0 nlme_3.1-145
## [157] jsonlite_1.6.1 Rhdf5lib_1.4.3 limma_3.38.3 viridisLite_0.3.0
## [161] fansi_0.4.1 pillar_1.4.3 lattice_0.20-41 fastmap_1.0.1
## [165] httr_1.4.1 survival_3.2-3 GO.db_3.7.0 glue_1.4.0
## [169] UpSetR_1.4.0 spatstat_1.64-1 png_0.1-7 bit_1.1-15.2
## [173] class_7.3-16 ggforce_0.3.2 stringi_1.4.6 blob_1.2.1
## [177] caTools_1.17.1.1 memoise_1.1.0 e1071_1.7-3 irlba_2.3.3
## [181] future.apply_1.4.0 ape_5.3
A work by Mathew Chamberlain
mathew.chamberlain@sanofi.com